# 3장 데이터 시각화 함수
# 3.1.2 벡터라이제이션 : 벡터의 연산 과정인 입력, 연산, 출력에서 벡터를 원소 하나씩 반복적으로 처리하지 않고 통째로 처리하는 기법
# 3.2 graphics 패키지
# 3.2.1 barplot() 함수
## https://www.statmethods.net/graphs/bar.html
# 막대그림
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", xlab="Number of Gears")

# 수평 막대그림
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE,
names.arg=c("3 Gears", "4 Gears", "5 Gears"))

# 벡터가 아닌 행렬자료는 비중까지 그려짐
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
xlab="Number of Gears", col=c("darkblue","red"),
legend = rownames(counts))

# beside 옵션을 주면 비중이 아닌 집단 막대그림
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
xlab="Number of Gears", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)

counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE, names.arg=c("3 Gears", "4 Gears", "5 Gears"), cex.names=0.8)
par(las=2) # make label text perpendicular to axis
barplot(counts, main="Car Distribution", horiz=TRUE, names.arg=c("3 Gears", "4 Gears", "5 Gears"), cex.names=0.8)

par(las=1)
# 막대그림의 옵셥들
set.seed(1)
bar.x <- round(runif(12) * 50)
bar.x
## [1] 13 19 29 45 10 45 47 33 31 3 10 9
barplot(bar.x) # 막대그림은 빈도 자료의 벡터를 사용하여 그림
title(main = "Vector Barplot")

barplot(-bar.x) # 음수로 줄 경우 상하가 반전
title(main = "Vector Barplot(Negative Value)")

set.seed(2);bar.y <- matrix(bar.x, ncol = 3, byrow = T)
bar.y
## [,1] [,2] [,3]
## [1,] 13 19 29
## [2,] 45 10 45
## [3,] 47 33 31
## [4,] 3 10 9
barplot(bar.y, main = "Matrix Barplot")

barplot(bar.y, beside = TRUE, main = "Matrix Barplot by beside = TRUE")

bar.width <- rep(1:3, 4)
bar.width
## [1] 1 2 3 1 2 3 1 2 3 1 2 3
barplot(bar.x, width = bar.width) #막대의 폭 변경
title(main = "Vector Barplot by width 1:3")

barplot(bar.x, space = 2) # 막대 그림 사이의 공간
title(main = "Vector Barplot by space = 2")

barplot(bar.y, beside = TRUE, space = c(0.5, 2)) # 그룹별 공간을 준 그림
title(main="Vector Barplot by space = c(0.5, 2)")

rownames(bar.y) <- paste("row", 1:4)
colnames(bar.y) <- paste("col", 1:3)
bar.y #행렬에 이름이 있으면...
## col 1 col 2 col 3
## row 1 13 19 29
## row 2 45 10 45
## row 3 47 33 31
## row 4 3 10 9
barplot(bar.y)
title(main = "Matrix Barplot using default names.arg")

barplot(bar.x, names.arg = letters[1:length(bar.x)])
title(main = "Vector Barplot using names.arg")

barplot(bar.x, legend.text = letters[1:length(bar.x)])
title(main = "Vector Barplot using legend.text")

barplot(bar.y, legend.text = T)
title(main = "Matrix Barplot using legend.text = T")

barplot(bar.x, horiz = T, density = 5)
title(main = "Vector Barplot by horiz = T, density = 5")

barplot(bar.x, density = 15, angle = 135)
title(main = "Vector Barplot by density = 15, angle = 135")

barplot(bar.x, col = rainbow(length(bar.x)))
title(main = "Vector Barplot by rainbow color")

barplot(bar.y, border = "red",
col = c("lightblue", "mistyrose","lightcyan", "lavender"))
title(main = "Matrix Barplot by col, border")

barplot(bar.x, axes = FALSE)
title(main = "Vector Barplot by axes = FALSE")

barplot(bar.y, cex.axis = 1.8, ylim = c(0, 90))
title(main = "Matrix Barplot by cex.axis,ylim")

barplot(bar.y, axisnames = T, cex.names = 1.8, axis.lty = 2)
title(main = "Matrix Barplot by cex.names, axis.lty")

# 3.2.2 boxplot() 함수
# https://www.statmethods.net/graphs/boxplot.html
# Boxplot of MPG by Car Cylinders
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data",
xlab="Number of Cylinders", ylab="Miles Per Gallon")

# Notched Boxplot of Tooth Growth Against 2 Crossed Factors
# boxes colored for ease of interpretation
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
boxplot(len~supp*dose, data=ToothGrowth, notch=TRUE,
col=(c("gold","darkgreen")),
main="Tooth Growth", xlab="Suppliment and Dose")
## Warning in bxp(list(stats = structure(c(8.2, 9.7, 12.25, 16.5, 21.5, 4.2, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
# Violin Plots
if(!require(vioplot)) install.packages("vioplot"); library(vioplot)
## Loading required package: vioplot
## Loading required package: sm
## Package 'sm', version 2.2-5.6: type help(sm) for summary information
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric

x1 <- mtcars$mpg[mtcars$cyl==4]
x2 <- mtcars$mpg[mtcars$cyl==6]
x3 <- mtcars$mpg[mtcars$cyl==8]
vioplot(x1, x2, x3, names=c("4 cyl", "6 cyl", "8 cyl"), col="gold")
title("Violin Plots of Miles Per Gallon")

# Bagplot - A 2D Boxplot Extension
# Violin Plots
# Example of a Bagplot
if(!require(aplpack)) install.packages("aplpack"); library(aplpack)
## Loading required package: aplpack
## Loading required package: tcltk
bagplot(mtcars$wt,mtcars$mpg,
xlab="Car Weight", ylab="Miles Per Gallon",
main="Bagplot Example")

# 상자그림의 옵션
set.seed(1)
norm1 <- round(rnorm(100, 3, 2), digits = 2)
set.seed(2)
norm2 <- round(rnorm(100, 3, 3), digits = 2)
boxplot(norm1, main="boxplot of one vector")

boxplot(norm1, norm2, main="boxplot of two vectors")

# 리스트로 저장된 자료도 상자 그림으로 그려짐
list1 = list(data1 = norm1, data2 = norm2, data3 = rnorm(100, 7, 4))
list1
## $data1
## [1] 1.75 3.37 1.33 6.19 3.66 1.36 3.97 4.48 4.15 2.39 6.02
## [12] 3.78 1.76 -1.43 5.25 2.91 2.97 4.89 4.64 4.19 4.84 4.56
## [23] 3.15 -0.98 4.24 2.89 2.69 0.06 2.04 3.84 5.72 2.79 3.78
## [34] 2.89 0.25 2.17 2.21 2.88 5.20 4.53 2.67 2.49 4.39 4.11
## [45] 1.62 1.59 3.73 4.54 2.78 4.76 3.80 1.78 3.68 0.74 5.87
## [56] 6.96 2.27 0.91 4.14 2.73 7.80 2.92 4.38 3.06 1.51 3.38
## [67] -0.61 5.93 3.31 7.35 3.95 1.58 4.22 1.13 0.49 3.58 2.11
## [78] 3.00 3.15 1.82 1.86 2.73 5.36 -0.05 4.19 3.67 5.13 2.39
## [89] 3.74 3.53 1.91 5.42 5.32 4.40 6.17 4.12 0.45 1.85 0.55
## [100] 2.05
##
## $data2
## [1] 0.31 3.55 7.76 -0.39 2.76 3.40 5.12 2.28 8.95 2.58 4.25
## [12] 5.95 1.82 -0.12 8.35 -3.93 5.64 3.11 6.04 4.30 9.27 -0.60
## [23] 7.77 8.86 3.01 -4.36 4.43 1.21 5.38 3.87 5.22 3.96 6.23
## [34] 2.15 0.67 1.21 -2.18 0.29 1.32 2.26 1.85 -2.88 0.47 8.71
## [45] 4.87 8.97 2.08 2.73 2.45 -0.60 0.49 9.20 1.31 6.83 -0.14
## [56] -2.90 2.03 5.81 6.42 8.01 -2.36 9.09 0.89 3.47 4.52 0.54
## [67] -3.00 1.56 3.25 0.31 0.24 3.99 2.58 4.30 2.84 0.28 6.91
## [78] 5.32 6.16 -1.23 5.99 -2.09 1.40 -1.12 -3.62 8.47 1.04 2.15
## [89] 1.84 4.16 7.80 8.04 -0.55 -1.08 -1.54 -0.76 8.88 3.02 0.47
## [100] 1.20
##
## $data3
## [1] 11.29783763 8.04239134 5.74291208 4.00147962 3.55120668
## [6] 15.19216121 10.75968031 15.03474846 5.31450571 5.59666231
## [11] 2.89047761 5.99792349 8.88743786 12.43575928 9.25667441
## [16] 8.82392036 11.92381465 11.58854739 7.42639216 3.86673337
## [21] 11.96479931 7.55543368 13.84252635 5.27743610 2.82308168
## [26] 9.15031810 4.32165605 9.55522245 0.10404066 0.03027968
## [31] 9.75921669 8.32385271 10.48427084 -1.06498233 11.85031641
## [36] 11.80197880 11.12827330 10.14564102 15.44029406 1.18476061
## [41] 4.66758461 8.63889593 3.77207346 7.34220176 9.98497267
## [46] 4.38530775 9.62842393 9.19963694 3.77308257 3.01048113
## [51] 10.90356255 6.32230728 9.88876712 3.62232557 12.10917474
## [56] 1.62755780 10.06136268 8.85681028 8.07197311 9.67009075
## [61] 8.59386914 4.44771588 5.92914838 8.43951826 1.74853562
## [66] 3.46412156 15.30837918 -1.39690253 2.04597614 10.96173236
## [71] 11.35464745 10.35940902 7.22743455 8.29551220 3.38132533
## [76] 4.39126461 5.95018145 3.26134864 10.28464485 0.50296331
## [81] 2.87838532 1.95228275 8.56873850 2.47424695 9.17657794
## [86] 11.70643574 7.10091428 9.06053268 4.38356096 9.01456796
## [91] 1.91152311 6.69291539 1.61872250 5.93472976 11.35025198
## [96] 9.80227118 5.22896194 3.84592014 3.57289716 4.01432394
boxplot(list1)
title("boxplot of simple list")

InsectSprays
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
## 7 10 A
## 8 23 A
## 9 17 A
## 10 20 A
## 11 14 A
## 12 13 A
## 13 11 B
## 14 17 B
## 15 21 B
## 16 11 B
## 17 16 B
## 18 14 B
## 19 17 B
## 20 17 B
## 21 19 B
## 22 21 B
## 23 7 B
## 24 13 B
## 25 0 C
## 26 1 C
## 27 7 C
## 28 2 C
## 29 3 C
## 30 1 C
## 31 2 C
## 32 1 C
## 33 3 C
## 34 0 C
## 35 1 C
## 36 4 C
## 37 3 D
## 38 5 D
## 39 12 D
## 40 6 D
## 41 4 D
## 42 3 D
## 43 5 D
## 44 5 D
## 45 5 D
## 46 5 D
## 47 2 D
## 48 4 D
## 49 3 E
## 50 5 E
## 51 3 E
## 52 5 E
## 53 3 E
## 54 6 E
## 55 1 E
## 56 1 E
## 57 3 E
## 58 2 E
## 59 6 E
## 60 4 E
## 61 11 F
## 62 9 F
## 63 15 F
## 64 22 F
## 65 15 F
## 66 16 F
## 67 13 F
## 68 10 F
## 69 26 F
## 70 26 F
## 71 24 F
## 72 13 F
dim(InsectSprays)
## [1] 72 2
unique(InsectSprays$spray)
## [1] A B C D E F
## Levels: A B C D E F
boxplot(count ~ spray, data = InsectSprays, col = "lightgray")
title("boxplot of dataframe by formula")

head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
boxplot(len ~ dose, data = ToothGrowth, main="len ~ dose")

boxplot(len ~ supp, data = ToothGrowth, main="len ~ supp")

boxplot(len ~ dose + supp, data = ToothGrowth, main="len ~ dose + supp")

boxplot(len ~ supp == "VC", data = ToothGrowth) #원하는 자료만 선택
title("len ~ supp == \"VC\"")

boxplot(len ~ dose, data = ToothGrowth, subset = supp == "VC") # subset 옵션 사용 가능
title("len ~ dose, subset = supp == \"VC\"")

boxplot(len[supp == "VC"] ~ dose[supp == "VC"], data = ToothGrowth)
title("len[supp == \"VC\"] ~ dose[supp == \"VC\"]")

set.seed(3)
z <- round(rnorm(50) * 10)
summary(z)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -23.00 -7.00 -1.50 -0.66 7.00 17.00
z[50] <- 40 # 50번째 데이터를 40으로 치환하여 이상치를 만듦
summary(z)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -23.00 -7.00 -0.50 0.32 7.75 40.00
boxplot(z)
title(main="range = default(1.5)")

boxplot(z, range = 0) # 이상치 없이 최대 최소 모두 그리기
title(main="range = 0")

boxplot(z, range = 1.0) # IQR 범위까지 상자 그리기기
title(main="range = 1.0")

boxplot(z, range = 2.0) # IQR * 2 범위까지 상자 그리기
title(main="range = 2.0")

x1 <- runif(20)
x2 <- runif(20)
x3 <- runif(20)
x4 <- runif(20)
x5 <- runif(20)
x <- list(x1, x2, x3, x4, x5)
y1 <- runif(10)
y2 <- runif(40)
y3 <- runif(90)
y4 <- runif(160)
y <- list(y1, y2, y3, y4)
boxplot(x)
title(main = "default")

boxplot(x, width = 1:5)
title(main = "width = 1:5")

str(y)
## List of 4
## $ : num [1:10] 0.731 0.86 0.343 0.493 0.854 ...
## $ : num [1:40] 0.7285 0.4766 0.5819 0.2071 0.0289 ...
## $ : num [1:90] 0.223 0.255 0.65 0.617 0.253 ...
## $ : num [1:160] 0.593 0.282 0.338 0.498 0.594 ...
boxplot(y, varwidth = T)
title(main = "varwidth = T")

boxplot(y, varwidth = T, width = 4:1)
title(main = "varwidth = T & width = 4:1")

boxplot(y)
title(main = "notch = default(FALSE)")

boxplot(y, notch = T, main = "notch = TRUE")
## Warning in bxp(list(stats = structure(c(0.0328013296239078,
## 0.38725300040096, : some notches went outside hinges ('box'): maybe set
## notch=FALSE

boxplot(z, main = "outline = default(TRUE)")

boxplot(z, outline = F, main = "outline = FALSE")

# names 인수를 사용할 경우
xname <- c("x1", "x2", "x3", "x4", "x5")
boxplot(x, names = xname)
title(main = "using names argument")

# names attributes를 이용할 경우
names(x) <- c("x1", "x2", "x3", "x4", "x5")
boxplot(x)
title(main = "using names attributes")

boxplot(x, boxwex = 0.3)
title(main = "boxwex = 1")

boxplot(x, staplewex = 2)
title(main = "staplewex = 2")

boxplot(x, plot = FALSE)
## $stats
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.04117032 0.09869789 0.03852075 0.1087810 0.09831946
## [2,] 0.18077793 0.31802044 0.23061703 0.4298766 0.26834736
## [3,] 0.37115586 0.51577862 0.46259269 0.5765672 0.34477785
## [4,] 0.69219256 0.67721069 0.72621593 0.7847074 0.63193596
## [5,] 0.91342360 0.87987004 0.88821092 0.9586770 0.90653142
##
## $n
## [1] 20 20 20 20 20
##
## $conf
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.1904737 0.3888772 0.2874982 0.4512060 0.2163224
## [2,] 0.5518380 0.6426801 0.6376872 0.7019285 0.4732332
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "x1" "x2" "x3" "x4" "x5"
boxplot(z, plot = FALSE)
## $stats
## [,1]
## [1,] -23.0
## [2,] -7.0
## [3,] -0.5
## [4,] 8.0
## [5,] 17.0
##
## $n
## [1] 50
##
## $conf
## [,1]
## [1,] -3.851686
## [2,] 2.851686
##
## $out
## [1] 40
##
## $group
## [1] 1
##
## $names
## [1] "1"
boxplot(x, border = "magenta", col = c("lightblue", "mistyrose", "lightcyan", "lavender"))
title(main = "use border, col")

boxplot(x, horizontal = TRUE)
title(main = "horizontal = TRUE")

# at을 통해 원하는 위치에 상자그림을 그릴 수 있음
boxplot(len ~ dose, data = ToothGrowth, boxwex = 0.25, at = 1:3 - 0.2,
subset = supp == "VC", col = "yellow", main = "Guinea Pigs' Tooth Growth",
xlab = "Vitamin C dose mg", ylab = "tooth length", ylim = c(0, 35))
boxplot(len ~ dose, data = ToothGrowth, add = TRUE, boxwex = 0.25, at = 1:3 + 0.2,
subset = supp == "OJ", col = "orange")
legend(2, 9, c("Ascorbic acid", "Orange juice"), fill = c("yellow", "orange"))

# 참고로 알아두면 좋은 옵션들
boxplot(x, log = "y", main = "log = \"y\"")

boxplot(x, log = "x", main = "log = \"x\"")

boxplot(x)
boxplot(y, add = TRUE)
title(main = "add = TRUE(y is added to x)")

boxplot(y, staplelty = 3)
title(main = "staplelty = 3")

boxplot(z, outpch = 2)
title(main = "outpch = 2")

# 3.2.3 dotchart() 함수
# https://www.statmethods.net/graphs/dot.html
# Simple Dotplot
dotchart(mtcars$mpg,labels=row.names(mtcars),cex=.7,
main="Gas Milage for Car Models",
xlab="Miles Per Gallon")

# Dotplot: Grouped Sorted and Colored
# Sort by mpg, group and color by cylinder
x <- mtcars[order(mtcars$mpg),] # sort by mpg
x$cyl <- factor(x$cyl) # it must be a factor
x$color[x$cyl==4] <- "red"
x$color[x$cyl==6] <- "blue"
x$color[x$cyl==8] <- "darkgreen"
dotchart(x$mpg,labels=row.names(x),cex=.7,groups= x$cyl,
main="Gas Milage for Car Models\ngrouped by cylinder",
xlab="Miles Per Gallon", gcolor="black", color=x$color)

# dotchart의 다양한 옵션
month <- matrix(1:12, ncol = 3)
rownames(month) <- paste("Row", 1:4)
colnames(month) <- paste("Col", 1:3)
month
## Col 1 Col 2 Col 3
## Row 1 1 5 9
## Row 2 2 6 10
## Row 3 3 7 11
## Row 4 4 8 12
# (1) 벡터
dotchart(as.vector(month), label = month.abb)
title(main = "x is a vector")

# (2) 행렬
dotchart(month)
title(main = "x is a matrix")

# (3) group
quarter.name <- c("1QT", "2QT", "3QT", "4QT")
quarter <- factor(row(month), label = quarter.name)
quarter
## [1] 1QT 2QT 3QT 4QT 1QT 2QT 3QT 4QT 1QT 2QT 3QT 4QT
## Levels: 1QT 2QT 3QT 4QT
dotchart(month, groups = quarter)
title(main = "groups = quarter")

# (4) groups, labels
name <- c("1st", "2nd", "3rd")
month
## Col 1 Col 2 Col 3
## Row 1 1 5 9
## Row 2 2 6 10
## Row 3 3 7 11
## Row 4 4 8 12
dotchart(month, groups = quarter, labels = name)
title(main = "groups = quarter, labels = name")

dotchart(month, group = quarter, labels = month.abb)
title(main = "group=quarter, labels=month.abb")

gmean <- tapply(month, quarter, mean)
gmean
## 1QT 2QT 3QT 4QT
## 5 6 7 8
dotchart(month, group = quarter, labels = month.abb, gdata = gmean)
title(main = "group=quarter, labels=month.abbngdata=gmean")

dotchart(month, labels = month.abb, main = "default cex")

dotchart(month, labels = month.abb, cex = 1.1, main = "cex = 1.1")

dotchart(month, labels = month.abb, pch = 2, main = "pch = 2")

dotchart(month, labels = month.abb, groups = quarter, pch = 2, gpch = 5, gdata = gmean)
title(main = "pch = 2, gpch = 5")

dotchart(month, cex = 1.1, bg = "red", main = "bg = \"red\"")

dotchart(month, cex = 1.1, bg = "red", color = "blue")
title(main = "bg = \"red\", color = \"blue\"")

dotchart(month, cex = 1.1, color = "red", gcolor = "blue", groups = quarter,
gdata = gmean)
title(main = "color = \"red\", gcolor = \"blue\"")

dotchart(month, cex = 1.1, lcolor = "red", gcolor = "blue", groups = quarter,
gdata = gmean)
title(main = "lcolor = \"red\", gcolor = \"blue\"")

VADeaths
## Rural Male Rural Female Urban Male Urban Female
## 50-54 11.7 8.7 15.4 8.4
## 55-59 18.1 11.7 24.3 13.6
## 60-64 26.9 20.3 37.0 19.3
## 65-69 41.0 30.9 54.6 35.1
## 70-74 66.0 54.3 71.1 50.0
dotchart(VADeaths)
title(main = "Death Rates in Virginian(Population group)")

dotchart(t(VADeaths), xlim = c(0, 100))
title(main = "Death Rates in Virginian(Age group)")

# 3.2.4 hist() 함수
# https://www.statmethods.net/graphs/density.html
# Simple Histogram
hist(mtcars$mpg)

# Colored Histogram with Different Number of Bins
hist(mtcars$mpg, breaks=12, col="red")

# Add a Normal Curve
x <- mtcars$mpg
h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon",
main="Histogram with Normal Curve")
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)

# Kernel Density Plot
d <- density(mtcars$mpg) # returns the density data
plot(d) # plots the results

# Filled Density Plot
d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue")

# Compare MPG distributions for cars with
#4,6, or 8 cylinders
if(!require(sm)) install.packages("sm"); library(sm)
attach(mtcars)
# create value labels
cyl.f <- factor(cyl, levels= c(4,6,8),
labels = c("4 cylinder", "6 cylinder", "8 cylinder"))
# plot densities
sm.density.compare(mpg, cyl, xlab="Miles Per Gallon")
title(main="MPG Distribution by Car Cylinders")
# add legend via mouse click
colfill<-c(2:(2+length(levels(cyl.f))))
legend("topright", levels(cyl.f), fill=colfill)

detach()
set.seed(7)
hist.data <- rnorm(100, 3, 2)
hist.data <- round(hist.data, digits = 2)
summary(hist.data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.570 1.880 3.210 3.278 4.442 8.430
# # Sturges 공식으로 구해진 계급의 수
# class.n <- ceiling(log(length(hist.data), base = 2) +1 )
# class.n
#
# # pretty 함수로 구한 breaks
# hist.breaks <- pretty(hist.data, class.n)
# hist.breaks
#
# hist(hist.data, main = "breaks = default")
# hist(hist.data, breaks = class.n, main = "nclass = class.n")
# hist(hist.data, breaks = hist.breaks, main = "breaks = hist.breaks")
# hist(hist.data, nclass = hist.breaks, main = "nclass = hist.breaks")
#
# # 도수분포표 계산
# freq <- integer(length(hist.breaks) - 1)
# for (i in seq(freq)) {
# freq[i] <- sum(hist.breaks[i] < hist.data & hist.data <= hist.breaks[i + 1])
# }
# freq
#
# nclass.Sturges(hist.data)
# nclass.scott(hist.data)
# nclass.FD(hist.data)
# pretty(hist.data, nclass.Sturges(hist.data))
# pretty(hist.data, nclass.scott(hist.data))
# pretty(hist.data, nclass.FD(hist.data))
# pretty(hist.data, 10)
#
# hist(hist.data, breaks = "Sturges", main = "breaks = \"Sturges\"")
# hist(hist.data, breaks = "Scott", main = "breaks = \"Scott\"")
# hist(hist.data, breaks = nclass.FD, main = "breaks = nclass.FD")
# hist(hist.data, breaks = 10, main = "breaks = 10")
#
#
# hist.interval <- cut(hist.data, breaks = hist.breaks)
# hist.interval
# table(hist.interval)
hist(hist.data, labels = T, main = "freq = default")

hist(hist.data, freq = F, labels = T, main = "freq = FALSE, labels = T")

hist(hist.data, probability = TRUE, main = "probability = TRUE")

hist(hist.data, labels = LETTERS[1:10], main = "labels = LETTERS[1:10]")

hist(hist.data, right = FALSE, main = "right = FALSE")

hist(hist.data, xlim = c(1, 5), main = "xlim = c(1, 5)")

hist(hist.data, density = 20, col = "red", angle = 135, border = "blue")

hist(hist.data, axes = FALSE, main = "axes = FALSE")

hist(hist.data, plot = FALSE)
## $breaks
## [1] -1 0 1 2 3 4 5 6 7 8 9
##
## $counts
## [1] 4 5 17 20 20 18 8 3 4 1
##
## $density
## [1] 0.04 0.05 0.17 0.20 0.20 0.18 0.08 0.03 0.04 0.01
##
## $mids
## [1] -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5
##
## $xname
## [1] "hist.data"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# 3.2.5 pie() 함수
# https://www.statmethods.net/graphs/pie.html
# Simple Pie Chart
slices <- c(10, 12,4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices, labels = lbls, main="Pie Chart of Countries")

# Pie Chart with Percentages
slices <- c(10, 12, 4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)# add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
main="Pie Chart of Countries")

# 3D Exploded Pie Chart
if(!require(plotrix)) install.packages("plotrix"); library(plotrix)
## Loading required package: plotrix
slices <- c(10, 12, 4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie3D(slices,labels=lbls,explode=0.1,
main="Pie Chart of Countries ")

# Pie Chart from data frame with Appended Sample Sizes
mytable <- table(iris$Species)
lbls <- paste(names(mytable), "\n", mytable, sep="")
pie(mytable, labels = lbls,
main="Pie Chart of Species\n (with sample sizes)")

# 원도표의 옵션
set.seed(5)
pie.data <- sample(7)
pie.data
## [1] 2 3 1 6 4 5 7
pie(pie.data, main = "default")

pie(pie.data, labels = LETTERS[1:7], main = "labels = LETTERS[1:7]")

pie(pie.data, edges = 10, main = "edges = 10")

pie(pie.data, edges = 20, main = "edges = 20")

pie(pie.data, main = "default radius")

pie(pie.data, radius = 0.5, main = "radius = 0.5")

pie(pie.data, radius = 1.5, main = "radius = 1.5")

pie(pie.data, radius = 0, main = "radius = 0")

# 3.2.6 stripchart() 함수 *
set.seed(1)
x <- round(rnorm(50), 1)
set.seed(2)
y <- round(rnorm(50), 1)
set.seed(3)
z <- round(rnorm(50), 1)
strip.data <- list(x, y, z)
stripchart(x, main = "a single vector") # 벡터

stripchart(strip.data, main = "a list having 3 vectors") # 리스트

with(OrchardSprays, stripchart(decrease ~ treatment, # formula
main = "formula decrease ~ treatment ", xlab = "treatment", ylab = "decrease"))

set.seed(3)
x <- rnorm(50)
xr <- round(x, 1)
stripchart(x)
m <- mean(par("usr")[1:2])
text(m, 1.04, "stripchart(x, \"overplot\")")
stripchart(xr, method = "stack", add = TRUE, at = 1.2)
text(m, 1.35, "stripchart(round(x, 1), \"stack\")")
stripchart(xr, method = "jitter", add = TRUE, at = 0.7)
text(m, 0.85, "stripchart(round(x,1), \"jitter\")")

with(OrchardSprays, stripchart(decrease ~ treatment, method = "jitter",
jitter = 0.2, col = "red", pch = 16, cex = 1.5, vertical = TRUE, log = "y",
main="stripchart(Orchardsprays)", xlab = "treatment", ylab = "decrease",
group.names = paste(LETTERS[1:8], "group")))

with(OrchardSprays, stripchart(decrease ~ treatment, method = "stack",
offset = 1/2, col = 1:8, pch = 15, cex = 1.5, main = "stripchart(Orchardsprays)"))

# Create some linked plots
if(!require(iplots)) install.packages("iplots"); library(iplots)
## Loading required package: iplots
## Loading required package: rJava
ihist(mtcars$mpg) # histogram
## ID:1 Name: "Histogram (mtcars$mpg)"
ibar(mtcars$carb) # barchart
## ID:2 Name: "Barchart (mtcars$carb)"
iplot(mtcars$mpg, mtcars$wt) # scatter plot
## ID:3 Name: "Scatterplot (mtcars$wt vs mtcars$mpg)"
ibox(mtcars[c("qsec","disp","hp")]) # boxplots
## ID:4 Name: "Boxplot (qsec)"
ipcp(mtcars[c("mpg","wt","hp")]) # parallel coordinates
## ID:5 Name: "Parallel coord. plot (default)"
# 참고로 알면 좋은 함수
# (1) Expression
curve(x ^ 3 - 3 * x, -2, 2)
title(main = "User defined expression")

myfun <- function(x) x ^ 2 + 2
# (2) User Function
curve(myfun, -pi, pi)
title(main = "User defined function")

# (3) R Function
curve(dnorm, from = -3, to = 3)
title(main = "Normal distribution density")

# (4) plot Function
plot(dnorm, from = -3, to = 3)
title(main = "curve by plot function")

curve(dnorm, from = -3, to = 3, n = 10)
title(main = "dnorm by n = 10")

curve(dnorm, from = -1, to = 1, xlim = c(-3, 3))
title(main = "dnorm by from=-1, to=1, xlim=c(-3,3)")

curve(sin, from = -2 * pi, to = 2 * pi, lty = 1, col = "red")
curve(cos, from = -2 * pi, to = 2 * pi, lty = 2, col = "blue", add = T)
title(main = "add = TRUE")

curve(dnorm, from = -3, to = 3, log = "y")
title(main = "dnorm by log = \"y\"")

# 3.2.8 matplot(), matpoints(), matlines() 함수 *그룹별 그림을 그릴때
set.seed(10)
y1 <- rnorm(20, mean = -3, sd = 1)
set.seed(20)
y2 <- rnorm(20, mean = 0, sd = 1)
set.seed(30)
y3 <- rnorm(20, mean = 3, sd = 1)
mat <- cbind(y1, y2, y3)
mat
## y1 y2 y3
## [1,] -2.981254 1.16268529 1.711482
## [2,] -3.184253 -0.58592447 2.652311
## [3,] -4.371331 1.78546500 2.478371
## [4,] -3.599168 -1.33259371 4.273473
## [5,] -2.705455 -0.44656677 4.824521
## [6,] -2.610206 0.56960612 1.488692
## [7,] -4.208076 -2.88971761 3.110508
## [8,] -3.363676 -0.86901834 2.239204
## [9,] -4.626673 -0.46170268 2.330103
## [10,] -3.256478 -0.55554091 3.274520
## [11,] -1.898220 -0.02013537 1.976728
## [12,] -2.244218 -0.15038222 1.180602
## [13,] -3.238234 -0.62812676 2.332210
## [14,] -2.012555 1.32322085 2.940702
## [15,] -2.258610 -1.52135057 3.880166
## [16,] -2.910653 -0.43742787 3.268513
## [17,] -3.954944 0.97057758 2.980421
## [18,] -3.195150 0.02822264 2.475053
## [19,] -2.074479 -0.08578219 1.590669
## [20,] -2.517021 0.38921440 1.166011
matplot(y1, type = "l", main = "One vecter argument")

matplot(y1, y2, main = "Two vecter arguments")

matplot(mat, main = "Matrix argument")

matplot(mat, type = "n", main = "Add matlines, matpoints")
matlines(mat)
matpoints(mat)

matplot(mat, type = "lSo", main = "type = \"lSo\"")

matplot(mat, type = c("l","S","o"), main = "type = c(\"l\", \"S\", \"o\")")

matplot(mat, col = c("red","blue","green"), cex = c(1, 1.2, 1.4))
title(main = "c(\"red\", \"blue\", \"green\"), cex=c(1, 1.2, 1.4)")

matplot(mat, type = "l", lty = 3:5, lwd = 1:3)
title(main = "lty = 3:5, lwd = 1:3")

matplot(mat)
matplot(rnorm(20), type = "l", add = TRUE)
title(main = "matplot add matplot")

matplot(mat, type = "n")
matlines(rnorm(20), type = "p")
title(main = "matlines type = \"p\"")

matplot(mat, type = "n")
matpoints(rnorm(20), type = "l")
title(main = "matpoints type = \"l\"")

matplot(mat, pch = 1:3, col = 3:5, verbose = TRUE) # matplot 정보가 출력됨
## matplot: doing 3 plots with col= ("3" "4" "5") pch= ("1" "2" "3") ...
title(main = "pch = 1:3")

# 3.2.9 qqnrom(), qqline(), qqplot() 함수 * 분위수 그림 (생략)
data1 <- round(rnorm(100, 3, 2), 2)
set.seed(2)
data2 <- round(rnorm(100, 3, 2), 2)
qqplot(data1, data2, main = "Q-Q 플롯")
abline(0,1)

plot(sort(data1), sort(data2), main = "plot of sorted data")
abline(0,1)

x <- round(rnorm(10), 2)
y <- round(rnorm(10), 2)
qqplot(x, y, plot.it = FALSE)
.Last.value
## $help_type
## NULL
qq <- qqplot(x, y, plot.it = FALSE)
qq
## $x
## [1] -0.86 -0.75 -0.42 -0.35 -0.31 0.26 0.94 1.07 2.01 2.05
##
## $y
## [1] -1.03 -0.78 -0.25 0.11 0.46 0.47 0.56 1.15 1.23 1.36
set.seed(1)
data1 <- rnorm(100, 3, 2)
set.seed(2)
data3 <- round(rnorm(50), 2)
qqplot(data1, data3, main = "Q-Q 플롯 length(data1) != length(data3)")
abline(-3/2, 1/2)
abline(0, 1, lty = 2)

seq.odd <- seq(1, 99, 2)
seq.even <- seq(2, 100, 2)
data.odd <- sort(data1)[seq.odd]
data.even <- sort(data1)[seq.even]
data.mean <- (data.odd + data.even ) / 2
plot(data.mean, sort(data3), main = "plot of modified data")
abline(-3/2, 1/2)
abline(0, 1, lty = 2)

set.seed(4)
data4 <- rnorm(50)
seq.norm <- seq(1, 99, 2) / 100 # 분위수를 구하기 위한 시퀀스
qnorm.data <- qnorm(seq.norm) # 표준 정규분포의 분위수 계산
par(mfrow = c(2,1))
qqnorm(data4, main = "Q-Q Norm")
used.qqnorm <- .Last.value # qqnorm이 사용한 데이터 추출
abline(0, 1)
qqline(data4, lty = 2)
plot(qnorm.data, sort(data4), main = "using seq")
abline(0, 1)
qqline(data4, lty = 2)

sort(data4) # data4를 정렬한 순서통계량
## [1] -1.79738202 -1.68804858 -1.48218912 -1.28124663 -0.92802811
## [6] -0.86214614 -0.82099358 -0.75421121 -0.63754350 -0.54249257
## [11] -0.46589588 -0.40451983 -0.37565514 -0.28344457 -0.28294368
## [16] -0.22740542 -0.21314452 -0.10036844 -0.04513712 -0.04420400
## [21] 0.01571945 0.03435191 0.09884369 0.15346418 0.16516902
## [26] 0.16902677 0.18153538 0.21675486 0.38305734 0.56660450
## [31] 0.59289694 0.59598058 0.68927544 0.72390416 0.86113187
## [36] 0.89114465 0.90983915 0.93409617 1.05193258 1.16502684
## [41] 1.24018084 1.25588403 1.28825688 1.29251234 1.30762236
## [46] 1.34370863 1.54081498 1.63561800 1.77686321 1.89653987
sort(used.qqnorm$y) # qqnorm이 사용한 y 좌표값
## NULL
qnorm.data # 표준 정규분포의 분위수
## [1] -2.32634787 -1.88079361 -1.64485363 -1.47579103 -1.34075503
## [6] -1.22652812 -1.12639113 -1.03643339 -0.95416525 -0.87789630
## [11] -0.80642125 -0.73884685 -0.67448975 -0.61281299 -0.55338472
## [16] -0.49585035 -0.43991317 -0.38532047 -0.33185335 -0.27931903
## [21] -0.22754498 -0.17637416 -0.12566135 -0.07526986 -0.02506891
## [26] 0.02506891 0.07526986 0.12566135 0.17637416 0.22754498
## [31] 0.27931903 0.33185335 0.38532047 0.43991317 0.49585035
## [36] 0.55338472 0.61281299 0.67448975 0.73884685 0.80642125
## [41] 0.87789630 0.95416525 1.03643339 1.12639113 1.22652812
## [46] 1.34075503 1.47579103 1.64485363 1.88079361 2.32634787
sort(used.qqnorm$x) # qqnorm이 사용한 x 좌표값
## NULL
# 3.2.10 sunflowerplot() 함수
x <- NULL
y <- NULL
for (i in 1:50) {
x <- c(x, rep(ifelse(i%%10 == 0, 10, i %%10), i))
y <- c(y, rep((i-1) %/% 10 + 1, i))
}
# (1)
t(table(x, y))
## x
## y 1 2 3 4 5 6 7 8 9 10
## 1 1 2 3 4 5 6 7 8 9 10
## 2 11 12 13 14 15 16 17 18 19 20
## 3 21 22 23 24 25 26 27 28 29 30
## 4 31 32 33 34 35 36 37 38 39 40
## 5 41 42 43 44 45 46 47 48 49 50
layout(matrix(c(1, 1, 2, 3), ncol = 2, byrow = T))
sunflowerplot(x, y, ylim = c(0.5, 5.2))
title(main = "sunflowerplot's petals")
text(x=ifelse(1:50 %% 10==0, 10, 1:50 %% 10), y=((1:50 - 1)%/%10+1) - 0.5,
as.character(1:50))
plot(x, y, pch = 16)
title(main = "scatter plot by plot")
plot(jitter(x), jitter(y), pch = 16)
title(main = "scatter plot by plot using jitter")

par(mfrow=c(1,1))
set.seed(101)
x <- sample(1:5, 200, replace = T)
set.seed(102)
y <- sample(1:5, 200, replace = T)
sunflowerplot(x, y)

sunflowerplot(1:10, rep(1, 10), ylim = c(0.5, 5.5), number = 1:10,
pch = 21, col = "blue", bg = "green", cex = 2)
text(5, 1.3, "number = 1:10, pch = 21, col = \"blue\", bg = \"green\", cex = 2", adj = 0.5)
sunflowerplot(1:10, rep(2, 10), add = T, number = 1:10,
seg.col = "gold", size = 1/3, seg.lwd = 2.5)
text(5, 2.3, "add = T, number = 1:10, seg.col = \"gold\", size = 1/3, seg.lwd = 2.5", adj = 0.5)
sunflowerplot(1:10, rep(3, 10), add = T, number = 1:10, cex.fact = 0.5)
text(5, 3.3, "add = T, number = 1:10, cex.fact = 0.5", adj = 0.5)
sunflowerplot(1:10, rep(4, 10), add = T, number = 1:10, cex.fact = 2.0)
text(5, 4.3, "add = T, number = 1:10, cex.fact = 2.0", adj = 0.5)
sunflowerplot(1:10, rep(5, 10), add = T, number = 1:10, rotate = TRUE)
text(5, 5.3, "add = T, number = 1:10, rotate = TRUE", adj = 0.5)

# 3.2.11 symbols() 함수 : 다차원의 자료를 2차원 그림으로 표현
x <- round(rnorm(20), 2)
y <- round(rnorm(20), 2)
z1 <- abs(round(rnorm(20), 2))
z2 <- abs(round(rnorm(20), 2))
z3 <- round(runif(20), 2)
z4 <- round(runif(20), 2)
z5 <- round(runif(20), 2)
symbols(x, y, circles = abs(x), inches = 0.2, bg = 1:20)
title(main = "symbols are circles")

symbols(x, y, squares = abs(x), inches = 0.2, bg = 1:20)
title(main = "symbols are squares")

symbols(x, y, rectangles = cbind(abs(x), abs(y)), inches = 0.2, bg = 1:20)
title(main = "symbols are rectangles")

symbols(x, y, stars = cbind(abs(x), abs(y), z1, z2, z3), inches = 0.2, bg=1:20)
title(main = "symbols are stars")

symbols(x, y, thermometers = cbind(abs(x), abs(y), z4), inches=0.2, bg=1:20)
title(main = "symbols are thermometers")

symbols(x, y, boxplots = cbind(abs(x), abs(y), z3, z4, z5), inches=0.2, bg=1:20)
title(main = "symbols are boxplots")

N <- nrow(trees)
palette(rainbow(N, end = 0.9))
with(trees, {
symbols(Height, Volume, circles = Girth/16, inches = FALSE, bg = 1:N,
fg = "gray30", main = "symbols(*, circles = Girth/16, inches = FALSE)")
symbols(Height, Volume, circles = Girth/16, inches = TRUE, bg = 1:N,
fg = "gray30", main = "symbols(*, circles = Girth/16,inches = TRUE)")
symbols(Height, Volume, circles = Girth/16, inches = 0.1, bg = 1:N,
fg = "gray30", main = "symbols(*, circles = Girth/16, inches = 0.1)")
})



palette("default")
# 3.2.12 assocplot() 함수 : 서로 다른 2개의 범주형 자료의 시각화
HairEyeColor
## , , Sex = Male
##
## Eye
## Hair Brown Blue Hazel Green
## Black 32 11 10 3
## Brown 53 50 25 15
## Red 10 10 7 7
## Blond 3 30 5 8
##
## , , Sex = Female
##
## Eye
## Hair Brown Blue Hazel Green
## Black 36 9 5 2
## Brown 66 34 29 14
## Red 16 7 7 7
## Blond 4 64 5 8
x <- margin.table(HairEyeColor, c(1, 2))
x
## Eye
## Hair Brown Blue Hazel Green
## Black 68 20 15 5
## Brown 119 84 54 29
## Red 26 17 14 14
## Blond 7 94 10 16
assocplot(x, main = "Relation between hair and eye color")

chisq.test(x)
##
## Pearson's Chi-squared test
##
## data: x
## X-squared = 138.29, df = 9, p-value < 2.2e-16
# 3.2.13 fourfolodplot() 함수
admis <- aperm(UCBAdmissions, c(2, 1, 3))
dimnames(admis)[[2]] <- c("Yes", "No")
names(dimnames(admis)) <- c("Sex", "Admit?", "Department")
admis
## , , Department = A
##
## Admit?
## Sex Yes No
## Male 512 313
## Female 89 19
##
## , , Department = B
##
## Admit?
## Sex Yes No
## Male 353 207
## Female 17 8
##
## , , Department = C
##
## Admit?
## Sex Yes No
## Male 120 205
## Female 202 391
##
## , , Department = D
##
## Admit?
## Sex Yes No
## Male 138 279
## Female 131 244
##
## , , Department = E
##
## Admit?
## Sex Yes No
## Male 53 138
## Female 94 299
##
## , , Department = F
##
## Admit?
## Sex Yes No
## Male 22 351
## Female 24 317
ftable(admis) # 전공, 성별, 합격유무
## Department A B C D E F
## Sex Admit?
## Male Yes 512 353 120 138 53 22
## No 313 207 205 279 138 351
## Female Yes 89 17 202 131 94 24
## No 19 8 391 244 299 317
# (1) 성별 합격여부의 데이터
admis.sex <- margin.table(admis, c(1, 2))
admis.sex
## Admit?
## Sex Yes No
## Male 1198 1493
## Female 557 1278
# (2) 성별 합격/불합격 비율
prop.table(admis.sex, 1)
## Admit?
## Sex Yes No
## Male 0.4451877 0.5548123
## Female 0.3035422 0.6964578
# (3)
fourfoldplot(admis.sex)

# (4)
fourfoldplot(admis)

# (5)
fourfoldplot(admis, margin = 2)

prop.table(admis[,,1], 1)
## Admit?
## Sex Yes No
## Male 0.6206061 0.3793939
## Female 0.8240741 0.1759259
prop.table(admis[,,2], 1)
## Admit?
## Sex Yes No
## Male 0.6303571 0.3696429
## Female 0.6800000 0.3200000
prop.table(admis[,,3], 1)
## Admit?
## Sex Yes No
## Male 0.3692308 0.6307692
## Female 0.3406408 0.6593592
prop.table(admis[,,4], 1)
## Admit?
## Sex Yes No
## Male 0.3309353 0.6690647
## Female 0.3493333 0.6506667
prop.table(admis[,,5], 1)
## Admit?
## Sex Yes No
## Male 0.2774869 0.7225131
## Female 0.2391858 0.7608142
prop.table(admis[,,6], 1)
## Admit?
## Sex Yes No
## Male 0.05898123 0.94101877
## Female 0.07038123 0.92961877
# 학부별 합격률
round(prop.table(margin.table(admis, c(2, 3)), 2) * 100, 2)
## Department
## Admit? A B C D E F
## Yes 64.42 63.25 35.08 33.96 25.17 6.44
## No 35.58 36.75 64.92 66.04 74.83 93.56
# 3.2.14 mosaiocplot() 함수
mosaicplot(admis.sex, color = FALSE, las = 0, main = "color = FALSE, las = 0")

mosaicplot(admis.sex, color = TRUE, las = 1, dir = c("h", "v"),
xlab = "Admit?", ylab = "Sex",
main = "color = T, las = 1,dir = c(\"h\", \"v\"), xlab, ylab")

mosaicplot(~ Gender + Admit, data = UCBAdmissions, sort = c(2, 1),
color = 2:3, las = 2,
main = "formula, sort = c(2, 1), color = 2:3, las = 2")

mosaicplot(admis.sex, off = c(5, 20), las = 3, shade = TRUE,
main = "off = c(5, 20), las = 3, shade = TRUE")

mosaicplot(admis, sort = c(3, 1, 2), shade = T, margin = list(c(1, 3), c(2, 3)),
xlab = "Department", main = "Sex, Admit?, Department Mosaic Plots")

# 3.2.15 paris() 함수
pairs(~ Fertility + Education + Catholic, data = swiss,
subset = Education < 20, main = "Swiss data, Education < 20",
col = 1 + (swiss$Agriculture > 50), cex = 1.2,
pch = 1 + (swiss$Agriculture > 50))

pairs(iris[1:4], main = "Anderson's Iris Data--3 species",
pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])

# 대각 패널 함수의 정의
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}
pairs(USJudgeRatings[1:3], panel = panel.smooth,
cex = 1.5, pch = 24, bg = "light blue",
diag.panel = panel.hist, cex.labels = 2, font.labels = 2)

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
if(missing(cex.cor)) cex <- 0.8 / strwidth(txt)
text(0.5, 0.5, txt, cex = cex * r)
}
pairs(USJudgeRatings[1:3], row1attop = FALSE, gap = 2,
lower.panel = panel.smooth, upper.panel = panel.cor )

# 3.2.16 coplot() 함수
length(quakes$depth)
## [1] 1000
summary(quakes$depth)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40.0 99.0 247.0 311.4 543.0 680.0
inter <- co.intervals(quakes$depth, number = 4, overlap = 0.1)
inter
## [,1] [,2]
## [1,] 39.5 107.5
## [2,] 96.5 260.5
## [3,] 238.5 544.5
## [4,] 534.5 680.5
inter[, 2] - inter[, 1] # 등 간격이 아님
## [1] 68 164 306 146
length.inter <- as.numeric(0)
for (i in 1:4)
length.inter[i] <- length(quakes$depth[inter[i, 1] <= quakes$depth &
quakes$depth <= inter[i, 2]])
length.inter # 도수의 분포가 균일하게 나눔
## [1] 272 272 271 270
sum(length.inter) # 도수의 합이 1000을 넘음
## [1] 1085
dim(quakes)
## [1] 1000 5
is.data.frame(quakes)
## [1] TRUE
names(quakes)
## [1] "lat" "long" "depth" "mag" "stations"
coplot(lat ~ long | depth, data = quakes)

dim(iris)
## [1] 150 5
is.data.frame(iris)
## [1] TRUE
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
coplot(Sepal.Length ~ Sepal.Width | Species, data = iris)

formulas <- lat ~ long | depth * mag
coplot(formulas, data = quakes, col = "red", pch = 2,
number = c(3, 4), bar.bg = c(num = "green", fac = "blue"))

# 3.2.17 satrs() 함수
op <- par(no.readonly = TRUE)
WESTarrests <- USArrests[state.region == "West",] # (1)
par(mfrow = c(2, 2))
stars(WESTarrests, draw.segments = FALSE, len = 0.7, key.loc = c(7, 2)) # (2)
title(main = "Stars Plot, unit key, len = 0.7")
stars(WESTarrests, draw.segments = TRUE, full = FALSE, key.loc = c(7, 2)) # (3)
title(main = "Segments Plot,unit key, full = F")
stars(WESTarrests, locations = c(0, 0), scale = TRUE, radius = FALSE,
col.stars = 0, key.loc = c(0, 0)) # (4)
title(main = "Spider Plot,unit key, scale = T, radius = F")
stars(WESTarrests, locations = 0:1, scale = TRUE, draw.segments = TRUE,
col.segments = 0, col.stars = 0, key.loc = 0:1) # (5)
title(main = "Radar Plot,unit key, scale = T")

par(op)
# 3.2.18 persp() 함수 # 3차원 함수
# (1) sinc 함수를 정의함
x <- seq(-10, 10, length = 30)
y <- x
f <- function(x, y) { r <- sqrt(x ^ 2 + y ^ 2); 10 * sin(r) / r }
z <- outer(x, y, f)
z[is.na(z)] <- 1 # 결측치의 값을 1로 바꾼다.
# (2) sinc 함수를 투시도로 그림
persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue",
ltheta = 120, shade = 0.75, ticktype = "detailed",
xlab = "X", ylab = "Y", zlab = "Sinc( r )") -> res
title(main="Perspective Plots with Sinc Function")
round(res, 3) # persp() 함수의 반환 값
## [,1] [,2] [,3] [,4]
## [1,] 0.087 -0.025 0.043 -0.043
## [2,] 0.050 0.043 -0.075 0.075
## [3,] 0.000 0.074 0.042 -0.042
## [4,] 0.000 -0.273 -2.890 3.890
# (3) 3차원 좌표로 변환하는 함수
trans3d <- function(x, y, z, pmat) {
tr <- cbind(x, y, z, 1) %*% pmat
list(x = tr[,1] / tr[,4], y = tr[,2] / tr[,4])
}
xE <- c(-10,10); xy <- expand.grid(xE, xE)
points(trans3d(xy[,1], xy[,2], 6, pm = res), col = 2, pch = 16)
lines (trans3d(x, y =10, z = 6 + sin(x), pm = res), col = 3)
phi <- seq(0, 2 * pi, len = 201)
r1 <- 7.725
xr <- r1 * cos(phi)
yr <- r1 * sin(phi)
lines(trans3d(xr, yr, f(xr, yr), res), col = "pink", lwd = 2)

z <- 2 * volcano # Exaggerate the relief
x <- 10 * (1:nrow(z)) # 10 meter spacing (S to N)
y <- 10 * (1:ncol(z)) # 10 meter spacing (E to W)
par(bg = "lavender")
persp(x, y, z, theta = 135, phi = 30, col = "green3", scale = FALSE,
ltheta = -120, shade = 0.75, border = NA, box = FALSE)
title("Perspective Plots with volcano")

# 3.2.19 contour() 함수
rx <- range(x <- 10 * 1:nrow(volcano))
ry <- range(y <- 10 * 1:ncol(volcano))
ry <- ry + c(-1, 1) * (diff(rx) - diff(ry)) / 2
tcol <- terrain.colors(12)
plot(x = 0, y = 0, type = "n", xlim = rx, ylim = ry, xlab = "", ylab = "")
u <- par("usr")
rect(u[1], u[3], u[2], u[4], col = tcol[8], border = "red")
contour(x, y, volcano, col = tcol[2], lty = "solid", add = TRUE,
vfont = c("sans serif", "plain"))
title("A Topographic Map of Maunga Whau", font = 4)
abline(h = 200 * 0:4, v = 200 * 0:4, col = "lightgray", lty = 2, lwd = 0.1)

line.list <- contourLines(x, y, volcano) # (1) contourLines 호출
plot(x = 0, y = 0, type = "n", xlim = rx, ylim = ry, xlab = "", ylab = "")
rect(u[1], u[3], u[2], u[4], col = tcol[8], border = "red")
is.list(line.list) # (2) 리스트 여부 확인
## [1] TRUE
length(line.list) # (3) 성분의 개수
## [1] 20
names(line.list[[1]]) # (4) 성분의 이름
## [1] "level" "x" "y"
line.list[[1]] # (5) 첫 번째 성분 출력
## $level
## [1] 100
##
## $x
## [1] 870.0000 860.0000 850.9173 850.0000 840.0000 830.9173 830.0000
## [8] 820.9173 820.0000 810.9173 810.0000 800.9173 800.0000 790.9173
## [15] 790.0000 780.9173 780.0000 770.4807 770.0000 760.3257 760.0000
## [22] 750.3257 750.0000 740.2463 740.0000 730.2463 730.0000 720.3257
## [29] 720.0000 710.4807 710.4807 710.0000 700.9173 700.0000 690.9173
## [36] 690.9173 690.9173 690.0000 680.9173 680.9173 680.9173 680.9173
## [43] 680.0000 670.9173 670.9173 670.9173 670.9173
##
## $y
## [1] 340.9173 340.9173 350.0000 350.9173 350.9173 360.0000 360.9173
## [8] 370.0000 370.9173 380.0000 380.9173 390.0000 390.9173 400.0000
## [15] 400.9173 410.0000 410.4807 420.0000 420.3257 430.0000 430.3257
## [22] 440.0000 440.2463 450.0000 450.2463 460.0000 460.3257 470.0000
## [29] 470.4807 480.0000 490.0000 490.9173 500.0000 500.9173 510.0000
## [36] 520.0000 530.0000 530.9173 540.0000 550.0000 560.0000 570.0000
## [43] 570.9173 580.0000 590.0000 600.0000 610.0000
templines <- function(clines) { # (6) 등고선과 라벨 출력 함수 정의
lines(clines[[2]], clines[[3]])
text(clines[[2]][1], clines[[3]][1], clines[[1]][1], cex = 0.5, col = "blue")
}
invisible(lapply(line.list, templines)) # (7) 등고선을 그린다.
title("A Topographic Map of Maunga Whau by contourLines", font=4)
abline(h = 200 * 0:4, v = 200*0:4, col = "lightgray", lty = 2, lwd = 0.1)

# 3.2.20 image() 함수
image(volcano, zlim = c(150, 200), xaxs = "r", yaxs = "r",
xlab = "West to East", ylab = "South to North")
image(volcano, zlim = c(0, 150), add = T, col = cm.colors(12),
xlab = "0 to 1", ylab = "0 to 1")
title(main = "image & add image")

x <- 10 * (1:nrow(volcano))
y <- 10 * (1:ncol(volcano))
image(x, y, volcano, col = terrain.colors(100), axes = FALSE)
contour(x, y, volcano, levels = seq(90, 200, by = 5),
add = TRUE, col = "peru")
axis(1, at = seq(100, 800, by = 100))
axis(2, at = seq(100, 600, by = 100))
box()
title(main = "Maunga Whau Volcano", font.main = 4)

# 3.2.21 filled.contour 함수()
x <- 10 * 1:nrow(volcano)
y <- 10 * 1:ncol(volcano)
filled.contour(x, y, volcano, color = terrain.colors,
plot.title = title(main = "The Topography of Maunga Whau",
xlab = "Meters North", ylab = "Meters West"),
plot.axes = { axis(1, seq(100, 800, by = 100))
axis(2, seq(100, 600, by = 100)) },
key.title = title(main = "Heightn(meters)"),
key.axes = axis(4, seq(90, 190, by = 10)))
